1.0 Load and Define

1.1 Load packages and presets

1.1.1 Packages

library("pacman")
pacman::p_load("rmarkdown", "tidyverse", "phyloseq", "knitr","kableExtra", "here", "plyr", "ggpubr", "microViz", "microbiome", "pheatmap", "vegan", "reshape2", "magrittr", "microshades", "pheatmap","vegan", "data.table", "Polychrome", "fantaxtic","cetcolor", "cowplot", "MicrobiomeStat", "randomForest", "caret", "mlbench", "MLmetrics", "mia", "here", "patchwork", "digest", "ANCOMBC", "Maaslin2", "microbiomeMarker")

here::i_am("01_phyloseq.Rmd")

1.1.2 My packages

source("MK_Microbiome_Functions.R")

1.1.2 Colors and presents

colors_all= c("Abscess" = "#FF495C", "Plaque"="#083D77")


core_colors = c("#FAAA00", "#3399FF","#F76F8E","#083D77","#B8D4E3", "#FF495C","#477071", '#03CEA4',  "#5F00BA", "#BDAC9E", "white", "#FFD900")

maaslin2_colors= c("CLR_LOG" = "#2c45b5", "CLR_NONE"="#86B8FD", 
                   "CSS_LOG" = "#FF495C", "CSS_NONE"="#F2929A",
                   "TMM_LOG" = "#F4B701", "TMM_NONE"="#FEE59A",
                   "TSS_LOG" = "#8FD694", "TSS_NONE"="#D2EED4",
                   "TSS_LOGIT" = "#354F52", "TSS_AST"="#CEDDDF")

pal20 <- createPalette(22, c("#F76F8E","#03CEA4", "#083D77"), range = c(30, 80))
pal20 <- unname(as.list(pal20))

resolution <- c("Phylum", "Class", "Order", "Family", "Genus", "Species")

set.seed(1234)
date = Sys.Date()

1.2 Load data and process

1.2.1 Load Data from 00

# Load the saved Rds file
loaded_objects <- readRDS("00data.rds")

# Assign back the objects to the environment, if needed
list2env(loaded_objects, .GlobalEnv)
## <environment: R_GlobalEnv>

1.2.2 Process Phyloseq

process_phyloseq(ps_fs)

2.0 Ordinations

2.1 PCA

plots <- lapply(resolution, function(rank) {
  plot_pca(phyloseq_obj = ps_f, 
           rank_transformation = rank, 
           variable = "Type", 
           colors_list = colors_all)})

combined_plot <- wrap_plots(plots, ncol = 2) & theme(legend.position = "bottom") 
combined_plot

2.2 PCoA NMDS Bray

plots <- lapply(resolution, function(rank) {
  plot_PCoA(phyloseq_obj = ps_f,
    rank_transformation = rank,
    trans_type = "identity",        
    dist_cal_type = "bray",   
    ord_calc_method = "NMDS",
    variable = "Type", 
    colors_list = colors_all)})
## Run 0 stress 0.1066463 
## Run 1 stress 0.1049526 
## ... New best solution
## ... Procrustes: rmse 0.05206063  max resid 0.2425043 
## Run 2 stress 0.1061714 
## Run 3 stress 0.1066132 
## Run 4 stress 0.1310523 
## Run 5 stress 0.1153191 
## Run 6 stress 0.1025361 
## ... New best solution
## ... Procrustes: rmse 0.03649304  max resid 0.2340134 
## Run 7 stress 0.1047739 
## Run 8 stress 0.1059989 
## Run 9 stress 0.1263261 
## Run 10 stress 0.1131383 
## Run 11 stress 0.1050426 
## Run 12 stress 0.1077247 
## Run 13 stress 0.1236921 
## Run 14 stress 0.1158842 
## Run 15 stress 0.1148456 
## Run 16 stress 0.1143097 
## Run 17 stress 0.1221283 
## Run 18 stress 0.4032103 
## Run 19 stress 0.1018337 
## ... New best solution
## ... Procrustes: rmse 0.05987405  max resid 0.257071 
## Run 20 stress 0.1076883 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     19: stress ratio > sratmax
##      1: scale factor of the gradient < sfgrmin
## Run 0 stress 0.1350544 
## Run 1 stress 0.1475462 
## Run 2 stress 0.1361029 
## Run 3 stress 0.1488209 
## Run 4 stress 0.1480776 
## Run 5 stress 0.1773815 
## Run 6 stress 0.1480795 
## Run 7 stress 0.1332653 
## ... New best solution
## ... Procrustes: rmse 0.02664107  max resid 0.159787 
## Run 8 stress 0.1476862 
## Run 9 stress 0.1557436 
## Run 10 stress 0.1348111 
## Run 11 stress 0.1490414 
## Run 12 stress 0.1332649 
## ... New best solution
## ... Procrustes: rmse 0.0003372176  max resid 0.002044453 
## ... Similar to previous best
## Run 13 stress 0.1429411 
## Run 14 stress 0.133265 
## ... Procrustes: rmse 0.0001662819  max resid 0.001007413 
## ... Similar to previous best
## Run 15 stress 0.1332649 
## ... Procrustes: rmse 0.0001058598  max resid 0.0006428467 
## ... Similar to previous best
## Run 16 stress 0.1347425 
## Run 17 stress 0.1557432 
## Run 18 stress 0.1549664 
## Run 19 stress 0.1480486 
## Run 20 stress 0.148802 
## *** Best solution repeated 3 times
## Run 0 stress 0.1368049 
## Run 1 stress 0.1518351 
## Run 2 stress 0.1561028 
## Run 3 stress 0.1366913 
## ... New best solution
## ... Procrustes: rmse 0.01624266  max resid 0.08608591 
## Run 4 stress 0.1514995 
## Run 5 stress 0.1590158 
## Run 6 stress 0.1374996 
## Run 7 stress 0.1366913 
## ... New best solution
## ... Procrustes: rmse 4.994881e-06  max resid 1.552576e-05 
## ... Similar to previous best
## Run 8 stress 0.1374997 
## Run 9 stress 0.1515008 
## Run 10 stress 0.1514994 
## Run 11 stress 0.1368049 
## ... Procrustes: rmse 0.0162463  max resid 0.08606053 
## Run 12 stress 0.1565967 
## Run 13 stress 0.1374997 
## Run 14 stress 0.1366913 
## ... New best solution
## ... Procrustes: rmse 1.5123e-06  max resid 4.563119e-06 
## ... Similar to previous best
## Run 15 stress 0.1467489 
## Run 16 stress 0.1374999 
## Run 17 stress 0.1535182 
## Run 18 stress 0.1375741 
## Run 19 stress 0.1565958 
## Run 20 stress 0.1374998 
## *** Best solution repeated 1 times
## Run 0 stress 0.1605509 
## Run 1 stress 0.1753006 
## Run 2 stress 0.1571292 
## ... New best solution
## ... Procrustes: rmse 0.0771428  max resid 0.3790885 
## Run 3 stress 0.1550772 
## ... New best solution
## ... Procrustes: rmse 0.07953198  max resid 0.3833288 
## Run 4 stress 0.1766037 
## Run 5 stress 0.1620363 
## Run 6 stress 0.1550774 
## ... Procrustes: rmse 0.0001656599  max resid 0.001041481 
## ... Similar to previous best
## Run 7 stress 0.1679841 
## Run 8 stress 0.1613913 
## Run 9 stress 0.154637 
## ... New best solution
## ... Procrustes: rmse 0.08632581  max resid 0.3842954 
## Run 10 stress 0.1595972 
## Run 11 stress 0.1653283 
## Run 12 stress 0.1703345 
## Run 13 stress 0.1732703 
## Run 14 stress 0.1653148 
## Run 15 stress 0.1566094 
## Run 16 stress 0.1561383 
## Run 17 stress 0.1551956 
## Run 18 stress 0.1851218 
## Run 19 stress 0.171435 
## Run 20 stress 0.1613622 
## *** Best solution was not repeated -- monoMDS stopping criteria:
##     20: stress ratio > sratmax
## Run 0 stress 0.1604567 
## Run 1 stress 0.1875855 
## Run 2 stress 0.171959 
## Run 3 stress 0.1585018 
## ... New best solution
## ... Procrustes: rmse 0.0287409  max resid 0.1917533 
## Run 4 stress 0.1566599 
## ... New best solution
## ... Procrustes: rmse 0.0344392  max resid 0.1696109 
## Run 5 stress 0.4032487 
## Run 6 stress 0.1574851 
## Run 7 stress 0.1559057 
## ... New best solution
## ... Procrustes: rmse 0.04901475  max resid 0.1990234 
## Run 8 stress 0.1866378 
## Run 9 stress 0.1566554 
## Run 10 stress 0.1559057 
## ... New best solution
## ... Procrustes: rmse 4.180679e-05  max resid 0.0002511405 
## ... Similar to previous best
## Run 11 stress 0.1574851 
## Run 12 stress 0.1591443 
## Run 13 stress 0.1581012 
## Run 14 stress 0.171658 
## Run 15 stress 0.1604198 
## Run 16 stress 0.1566601 
## Run 17 stress 0.1602859 
## Run 18 stress 0.1861975 
## Run 19 stress 0.1573254 
## Run 20 stress 0.1594623 
## *** Best solution repeated 1 times
## Run 0 stress 0.2039703 
## Run 1 stress 0.1788125 
## ... New best solution
## ... Procrustes: rmse 0.1003606  max resid 0.4932343 
## Run 2 stress 0.1814444 
## Run 3 stress 0.1787555 
## ... New best solution
## ... Procrustes: rmse 0.005353051  max resid 0.0310641 
## Run 4 stress 0.186015 
## Run 5 stress 0.1787556 
## ... Procrustes: rmse 0.0003712776  max resid 0.002320546 
## ... Similar to previous best
## Run 6 stress 0.178813 
## ... Procrustes: rmse 0.005359301  max resid 0.0311555 
## Run 7 stress 0.1788127 
## ... Procrustes: rmse 0.005355729  max resid 0.03112112 
## Run 8 stress 0.1813042 
## Run 9 stress 0.1830781 
## Run 10 stress 0.1921849 
## Run 11 stress 0.183279 
## Run 12 stress 0.2292839 
## Run 13 stress 0.2226254 
## Run 14 stress 0.22821 
## Run 15 stress 0.1814443 
## Run 16 stress 0.1948341 
## Run 17 stress 0.2229079 
## Run 18 stress 0.2307822 
## Run 19 stress 0.1813041 
## Run 20 stress 0.192167 
## *** Best solution repeated 1 times
combined_plot <- wrap_plots(plots, ncol = 2) & theme(legend.position = "bottom") 
print(combined_plot)

2.3 PCoA NMDS Jaccard

plots <- lapply(resolution, function(rank) {
  plot_PCoA(phyloseq_obj = ps_f,
    rank_transformation = rank,
    trans_type = "identity",        
    dist_cal_type = "jaccard",   
    ord_calc_method = "NMDS",
        variable = "Type", 
    colors_list = colors_all)})
## Run 0 stress 0.1066463 
## Run 1 stress 0.1057707 
## ... New best solution
## ... Procrustes: rmse 0.05053757  max resid 0.2536816 
## Run 2 stress 0.1047739 
## ... New best solution
## ... Procrustes: rmse 0.04847138  max resid 0.220279 
## Run 3 stress 0.105078 
## ... Procrustes: rmse 0.04821287  max resid 0.2410887 
## Run 4 stress 0.1163004 
## Run 5 stress 0.1025361 
## ... New best solution
## ... Procrustes: rmse 0.03633134  max resid 0.2340608 
## Run 6 stress 0.1025361 
## ... New best solution
## ... Procrustes: rmse 1.001813e-05  max resid 2.395668e-05 
## ... Similar to previous best
## Run 7 stress 0.1018337 
## ... New best solution
## ... Procrustes: rmse 0.05987319  max resid 0.2570684 
## Run 8 stress 0.103259 
## Run 9 stress 0.136881 
## Run 10 stress 0.1129523 
## Run 11 stress 0.1047739 
## Run 12 stress 0.1025361 
## Run 13 stress 0.1131383 
## Run 14 stress 0.4025584 
## Run 15 stress 0.1128669 
## Run 16 stress 0.1049527 
## Run 17 stress 0.1018337 
## ... New best solution
## ... Procrustes: rmse 3.711645e-06  max resid 1.768663e-05 
## ... Similar to previous best
## Run 18 stress 0.1075083 
## Run 19 stress 0.1018337 
## ... Procrustes: rmse 2.410012e-05  max resid 0.0001435325 
## ... Similar to previous best
## Run 20 stress 0.1131384 
## *** Best solution repeated 2 times
## Run 0 stress 0.1350538 
## Run 1 stress 0.1480539 
## Run 2 stress 0.1833202 
## Run 3 stress 0.1347425 
## ... New best solution
## ... Procrustes: rmse 0.03267226  max resid 0.1615236 
## Run 4 stress 0.1429413 
## Run 5 stress 0.1332653 
## ... New best solution
## ... Procrustes: rmse 0.04775592  max resid 0.1840863 
## Run 6 stress 0.1735385 
## Run 7 stress 0.1347425 
## Run 8 stress 0.1350537 
## Run 9 stress 0.1347425 
## Run 10 stress 0.1350539 
## Run 11 stress 0.1414057 
## Run 12 stress 0.1361029 
## Run 13 stress 0.1429414 
## Run 14 stress 0.133265 
## ... New best solution
## ... Procrustes: rmse 0.0003633362  max resid 0.002205083 
## ... Similar to previous best
## Run 15 stress 0.1434113 
## Run 16 stress 0.1488209 
## Run 17 stress 0.1347425 
## Run 18 stress 0.1348111 
## Run 19 stress 0.1429416 
## Run 20 stress 0.1347425 
## *** Best solution repeated 1 times
## Run 0 stress 0.1368049 
## Run 1 stress 0.1374343 
## Run 2 stress 0.1374997 
## Run 3 stress 0.1366913 
## ... New best solution
## ... Procrustes: rmse 0.01623983  max resid 0.08608204 
## Run 4 stress 0.1374346 
## Run 5 stress 0.1522454 
## Run 6 stress 0.1561036 
## Run 7 stress 0.1374998 
## Run 8 stress 0.1522455 
## Run 9 stress 0.1366913 
## ... Procrustes: rmse 6.306625e-06  max resid 3.586926e-05 
## ... Similar to previous best
## Run 10 stress 0.1467492 
## Run 11 stress 0.1374997 
## Run 12 stress 0.1525599 
## Run 13 stress 0.1374997 
## Run 14 stress 0.1374997 
## Run 15 stress 0.1478653 
## Run 16 stress 0.151835 
## Run 17 stress 0.1374343 
## Run 18 stress 0.1368049 
## ... Procrustes: rmse 0.01623782  max resid 0.08603796 
## Run 19 stress 0.156671 
## Run 20 stress 0.1374342 
## *** Best solution repeated 1 times
## Run 0 stress 0.1605542 
## Run 1 stress 0.1676194 
## Run 2 stress 0.1550773 
## ... New best solution
## ... Procrustes: rmse 0.04413878  max resid 0.1769119 
## Run 3 stress 0.1787994 
## Run 4 stress 0.1712338 
## Run 5 stress 0.1550773 
## ... New best solution
## ... Procrustes: rmse 0.0002262501  max resid 0.001415786 
## ... Similar to previous best
## Run 6 stress 0.2007449 
## Run 7 stress 0.1567091 
## Run 8 stress 0.1549193 
## ... New best solution
## ... Procrustes: rmse 0.08677906  max resid 0.383247 
## Run 9 stress 0.1607042 
## Run 10 stress 0.1605542 
## Run 11 stress 0.1613623 
## Run 12 stress 0.1962691 
## Run 13 stress 0.1732703 
## Run 14 stress 0.1967747 
## Run 15 stress 0.1620367 
## Run 16 stress 0.1546372 
## ... New best solution
## ... Procrustes: rmse 0.007032439  max resid 0.04715204 
## Run 17 stress 0.1552009 
## Run 18 stress 0.1549225 
## ... Procrustes: rmse 0.007012762  max resid 0.04672281 
## Run 19 stress 0.1546371 
## ... New best solution
## ... Procrustes: rmse 0.0001720636  max resid 0.0009690418 
## ... Similar to previous best
## Run 20 stress 0.157129 
## *** Best solution repeated 1 times
## Run 0 stress 0.1604567 
## Run 1 stress 0.1566555 
## ... New best solution
## ... Procrustes: rmse 0.04263941  max resid 0.192425 
## Run 2 stress 0.1878476 
## Run 3 stress 0.1559023 
## ... New best solution
## ... Procrustes: rmse 0.04992947  max resid 0.1996929 
## Run 4 stress 0.1597307 
## Run 5 stress 0.1574851 
## Run 6 stress 0.1597304 
## Run 7 stress 0.1597304 
## Run 8 stress 0.15666 
## Run 9 stress 0.1585018 
## Run 10 stress 0.1646555 
## Run 11 stress 0.1587563 
## Run 12 stress 0.1559057 
## ... Procrustes: rmse 0.0007937382  max resid 0.004327232 
## ... Similar to previous best
## Run 13 stress 0.15973 
## Run 14 stress 0.1645196 
## Run 15 stress 0.1773453 
## Run 16 stress 0.1587564 
## Run 17 stress 0.1597299 
## Run 18 stress 0.1821974 
## Run 19 stress 0.1719592 
## Run 20 stress 0.1581164 
## *** Best solution repeated 1 times
## Run 0 stress 0.2039699 
## Run 1 stress 0.2013584 
## ... New best solution
## ... Procrustes: rmse 0.08880037  max resid 0.4907944 
## Run 2 stress 0.194975 
## ... New best solution
## ... Procrustes: rmse 0.04637722  max resid 0.1824759 
## Run 3 stress 0.1814443 
## ... New best solution
## ... Procrustes: rmse 0.04770941  max resid 0.2963757 
## Run 4 stress 0.1969431 
## Run 5 stress 0.1813042 
## ... New best solution
## ... Procrustes: rmse 0.005245771  max resid 0.03029579 
## Run 6 stress 0.1788126 
## ... New best solution
## ... Procrustes: rmse 0.02566002  max resid 0.1718211 
## Run 7 stress 0.1787554 
## ... New best solution
## ... Procrustes: rmse 0.005356416  max resid 0.03111948 
## Run 8 stress 0.1813044 
## Run 9 stress 0.192167 
## Run 10 stress 0.1860822 
## Run 11 stress 0.1813044 
## Run 12 stress 0.1814443 
## Run 13 stress 0.1860149 
## Run 14 stress 0.1830761 
## Run 15 stress 0.1922337 
## Run 16 stress 0.2057518 
## Run 17 stress 0.1787557 
## ... Procrustes: rmse 0.0001595222  max resid 0.0009915646 
## ... Similar to previous best
## Run 18 stress 0.1922336 
## Run 19 stress 0.194939 
## Run 20 stress 0.1967627 
## *** Best solution repeated 1 times
combined_plot <- wrap_plots(plots, ncol = 2) & theme(legend.position = "bottom") 
print(combined_plot)

3.0 Alpha diversity

a_my_comparisons <- list( c("Abscess", "Plaque"))
symnum.args = list(cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("****", "***", "**", "*", "ns"))

p_Shannon <- plot_richness(ps_f, x="Type", measures="Shannon", color = "Type")+
  geom_boxplot(alpha=0.6)+ 
    theme_bw(base_size=14) +
  theme(legend.position="none",         
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12), 
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=12), 
        strip.background = element_rect(fill = "white", color = "white"), 
        strip.text = element_text(size = 14, face = "bold"))  +
  stat_compare_means(method = "wilcox.test", comparisons = a_my_comparisons, label = "p.format")+
  scale_color_manual(values = colors_all) +
  ylim(0, 6.2)


p_Chao1 <- plot_richness(ps_f, x="Type", measures="Chao1", color = "Type")+
  geom_boxplot(alpha=0.6)+ 
    theme_bw(base_size=14) +
  theme(legend.position="none",         
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12), 
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=12), 
        strip.background = element_rect(fill = "white", color = "white"), 
        strip.text = element_text(size = 14, face = "bold"))  +
  stat_compare_means(method = "wilcox.test", comparisons = a_my_comparisons, label = "p.format")+
  scale_color_manual(values = colors_all) +
  ylim(0, 470)


p_Observed <- plot_richness(ps_f, x="Type", measures="Observed", color = "Type")+
  geom_boxplot(alpha=0.6)+ 
    theme_bw(base_size=14) +
  theme(legend.position="none",         
        axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1, size = 12), 
        axis.title.x = element_blank(),
        axis.title.y = element_text(size=12), 
        strip.background = element_rect(fill = "white", color = "white"), 
        strip.text = element_text(size = 14, face = "bold"))  +
  stat_compare_means(method = "wilcox.test", comparisons = a_my_comparisons, label = "p.format")+
  scale_color_manual(values = colors_all) +
  ylim(0, 470)

plot <- ggarrange(p_Shannon, p_Observed, p_Chao1, ncol=3, nrow=1, common.legend = TRUE, legend="bottom")
annotate_figure(plot, top = text_grob(paste0("Alpha Diversity"), color = "black", face = "bold", size = 14))

4.0 Taxa Plots

4.1 Average Bar Plots

4.1.1 Phylum

top_df <- ps_fs_t_n %>%
  tax_glom(., "Phylum") %>%
  get_top_taxa(., 10, relative = TRUE, discard_other = TRUE) %>% psmelt(.)
top_df$Phylum <- gsub("p__","",as.character(top_df$Phylum))

top_df %>%
  dplyr::group_by(Type,Phylum)%>%
  dplyr::summarise(Average_Abundance = mean(Abundance))%>%
  ggbarplot(x= "Type", y="Average_Abundance",
            fill = "Phylum",
            panel.labs.font = list(size=12),
            panel.labs.background = list(color = NULL, fill = "white"),
            font.tickslab = 14,
            font.legend = c(10),
            palette = core_colors, 
            strip.position = "top",
            color = "black",
            x.text.angle = 45, 
            y.text.angle = 0, 
            font.x = 14,
            font.y = 12,
            font.title = 16,
            rotate = FALSE,
            title = "Phlyum Level", 
            xlab = "",
            ylab = "Average Relative Abundance",
            ggtheme = theme_bw()) + 
  font("title", face="bold") +
  theme(plot.title=element_text(hjust=0.5), plot.subtitle=element_text(hjust=0.5))

4.1.2 Genus

top_df <- ps_fs_t_n %>%
  tax_glom(., "Genus") %>%
  get_top_taxa(., 10, relative = TRUE, discard_other = TRUE) %>% psmelt(.)
top_df$Genus <- gsub("g__","",as.character(top_df$Genus))

top_df %>%
  dplyr::group_by(Type,Genus)%>%
  dplyr::summarise(Average_Abundance = mean(Abundance))%>%
  ggbarplot(x= "Type", y="Average_Abundance",
            fill = "Genus",
            panel.labs.font = list(size=12),
            panel.labs.background = list(color = NULL, fill = "white"),
            font.tickslab = 14,
            font.legend = c(10),
            palette = core_colors, 
            strip.position = "top",
            color = "black",
            x.text.angle = 45, 
            y.text.angle = 0, 
            font.x = 14,
            font.y = 12,
            font.title = 16,
            rotate = FALSE,
            title = "Top 10 Genera", 
            xlab = "",
            ylab = "Average Relative Abundance",
            ggtheme = theme_bw()) + 
  font("title", face="bold") +
  theme(plot.title=element_text(hjust=0.5), plot.subtitle=element_text(hjust=0.5), legend.text = element_text(face = "italic"))

4.1.3 Microshades Genus MB

# Use microshades function prep_mdf to agglomerate, normalize, and melt the phyloseq object
mdf_prep <- prep_mdf(ps_fs)

mdf_prep <- mdf_prep %>%
  mutate(Kingdom = gsub("k__", "", Kingdom)) %>%
  mutate(Phylum = gsub("p__", "", Phylum)) %>%
  mutate(Class = gsub("c__", "", Class)) %>%
  mutate(Order = gsub("o__", "", Order)) %>%
  mutate(Family = gsub("f__", "", Family)) %>%
  mutate(Genus = gsub("g__", "", Genus)) %>%
  mutate(Species = gsub("s__", "", Species)) 

print(unique(mdf_prep$Phylum))
##  [1] "Firmicutes"               "Fusobacteria"            
##  [3] "Proteobacteria"           "Bacteroidetes"           
##  [5] "Actinobacteria"           "Spirochaetes"            
##  [7] "Saccharibacteria_(TM7)"   "Synergistetes"           
##  [9] "Tenericutes"              "Absconditabacteria_(SR1)"
## [11] "Gracilibacteria_(GN02)"   "Chloroflexi"
# Create a color object for the specified data
color_obj <- create_color_dfs(mdf_prep, 
            selected_groups = c("Proteobacteria", "Actinobacteria","Bacteroidetes", "Fusobacteria",
                                "Spirochaetes"),
    cvd = FALSE, top_orientation=TRUE)

# Extract
mdf <- color_obj$mdf
cdf <- color_obj$cdf

plot_1 <- plot_microshades(mdf, cdf)

plot_1 + scale_y_continuous(labels = scales::percent, expand = expansion(0)) +
  theme_bw() +
  theme(legend.key.size = unit(0.2, "cm"), text=element_text(size=10), ) +
  theme(axis.text.x = element_text(size= 6)) +
  facet_wrap(~Type, scales= "free_x") 

4.1.4 Species

top_df <- ps_fs_t_n %>%
  tax_glom(., "Species") %>%
  get_top_taxa(., 10, relative = TRUE, discard_other = TRUE) %>% psmelt(.)
top_df$Species <- gsub("s__","",as.character(top_df$Species))

top_df %>%
  dplyr::group_by(Type,Species)%>%
  dplyr::summarise(Average_Abundance = mean(Abundance))%>%
  ggbarplot(x= "Type", y="Average_Abundance",
            fill = "Species",
            panel.labs.font = list(size=12),
            panel.labs.background = list(color = NULL, fill = "white"),
            font.tickslab = 10,
            font.legend = c(10),
            palette = pal20, 
            strip.position = "top",
            color = "black",
            x.text.angle = 90, 
            y.text.angle = 0, 
            font.x = 12,
            font.y = 12,
            font.title = 16,
            rotate = TRUE,
            title = "", 
            subtitle = "Species level",
            xlab = "Condition",
            ylab = "Average Abundance",
            ggtheme = theme_bw()) + 
  font("title", face="bold") +
  theme(plot.title=element_text(hjust=0.5), plot.subtitle=element_text(hjust=0.5))

4.2 Per taxa plots

4.2.1 Genus TSS Dot

plot_all_taxa(ps_fs, "TSS", "Type", "Genus", "dot", colors_all)

4.2.1 Genus CSS Dot

plot_all_taxa(ps_fs, "CLR", "Type", "Genus", "dot", colors_all)

4.2.2 Selected Species Plot

source("MK_Microbiome_Functions.R")

plot_all_taxa(ps_obj = ps_fs,
              transformation = "TSS",
              group = "Type",
              taxa_level = "Species",
              plot_type = "dot",
              plot_colors = colors_all,
              taxa_filter = c("g__Fusobacterium_s__nucleatum", "g__Fusobacterium_s__periodonticum"))

4.2.2 Genus TSS Dot

plot_all_taxa(ps_fs, "TSS", "Type", "Species", "dot", colors_all)

Differential Abundance

6.1 ANCOMBC2

6.1.1 Phylum

runANCOM(ps_fs, 
         tax_level = "Phylum", 
         group = "Type", 
         name_of_saved_results = "ancombc_results_phy", 
         plot_type = "dot",
         Log2FC_cutoff = 0.5,
         plot_heights = c(1,2), 
         plot_colors = colors_all)
## [1] "Column found: diff_TypePlaque"
## [1] "Column found: lfc_TypePlaque"
## [1] "Column found: passed_ss_TypePlaque"
## [1] "Column found: se_TypePlaque"
## [1] "Column found: p_TypePlaque"
##               taxon lfc_(Intercept) lfc_TypePlaque se_(Intercept) se_TypePlaque
## 1 p__Proteobacteria      -0.5582016       1.267529      0.2374937     0.4041508
## 2   p__Spirochaetes       1.0709010      -2.353296      0.2442910     0.4072588
##   W_(Intercept) W_TypePlaque p_(Intercept) p_TypePlaque q_(Intercept)
## 1     -2.350385     3.136278  0.0233020953 3.047664e-03   0.163114667
## 2      4.383711    -5.778380  0.0001593383 3.801161e-06   0.001274707
##   q_TypePlaque diff_(Intercept) diff_TypePlaque passed_ss_(Intercept)
## 1 2.133365e-02            FALSE            TRUE                 FALSE
## 2 3.040929e-05             TRUE            TRUE                 FALSE
##   passed_ss_TypePlaque Enrichment       color
## 1                FALSE     Plaque       black
## 2                 TRUE    Abscess aquamarine3

6.1.2 Genus

runANCOM(ps_fs, 
         tax_level = "Genus", 
         group = "Type", 
         name_of_saved_results = "ancombc_results_gen", 
         plot_type = "dot",
         plot_heights = c(1, 8), 
         plot_colors = colors_all )
## [1] "Column found: diff_TypePlaque"
## [1] "Column found: lfc_TypePlaque"
## [1] "Column found: passed_ss_TypePlaque"
## [1] "Column found: se_TypePlaque"
## [1] "Column found: p_TypePlaque"
##                       taxon lfc_(Intercept) lfc_TypePlaque se_(Intercept)
## 1           g__Peptidiphaga      -1.2109685       2.060273      0.1779828
## 2           g__Leptotrichia      -0.7916919       1.764869      0.2528025
## 3            g__Actinomyces      -0.9026368       1.752104      0.1963494
## 4        g__Corynebacterium      -0.6721630       1.319667      0.1769740
## 5            g__Johnsonella       0.9087141      -1.448142      0.1175551
## 6            g__Peptococcus       0.8377672      -1.614673      0.1256773
## 7    g__Bacteroidales_[G-2]       1.0074565      -1.690295      0.2058193
## 8  g__Veillonellaceae_[G-1]       0.4572095      -1.746030      0.1396265
## 9              g__Dialister       0.7303205      -1.840150      0.2949759
## 10    g__Lacticaseibacillus       0.7570866      -1.978970      0.2491851
## 11             g__Treponema       1.0709010      -2.353296      0.2327088
##    se_TypePlaque W_(Intercept) W_TypePlaque p_(Intercept) p_TypePlaque
## 1      0.3816722     -6.803852     5.398017  1.292583e-06 2.772340e-05
## 2      0.3893562     -3.131661     4.532788  3.338330e-03 5.646085e-05
## 3      0.3431105     -4.597095     5.106531  4.233296e-05 8.426682e-06
## 4      0.3106539     -3.798088     4.248028  5.752794e-04 1.581871e-04
## 5      0.2485282      7.730115    -5.826873  9.038769e-06 1.146942e-04
## 6      0.2540781      6.666020    -6.355029  5.421897e-06 9.549968e-06
## 7      0.3360720      4.894860    -5.029564  1.166260e-04 8.709083e-05
## 8      0.2729455      3.274518    -6.396992  6.036560e-03 2.354500e-05
## 9      0.4097949      2.475865    -4.490417  2.012503e-02 1.290207e-04
## 10     0.3456976      3.038250    -5.724570  1.030735e-02 9.536906e-05
## 11     0.3791365      4.601893    -6.206989  8.877626e-05 1.228472e-06
##    q_(Intercept) q_TypePlaque diff_(Intercept) diff_TypePlaque
## 1   7.109205e-05 1.441617e-03             TRUE            TRUE
## 2   1.368715e-01 2.879503e-03            FALSE            TRUE
## 3   2.158981e-03 4.634675e-04             TRUE            TRUE
## 4   2.646285e-02 7.276607e-03             TRUE            TRUE
## 5   4.790548e-04 5.505321e-03             TRUE            TRUE
## 6   2.927824e-04 5.156983e-04             TRUE            TRUE
## 7   5.598047e-03 4.354542e-03             TRUE            TRUE
## 8   2.173162e-01 1.247885e-03            FALSE            TRUE
## 9   6.037509e-01 6.063973e-03            FALSE            TRUE
## 10  3.298352e-01 4.673084e-03            FALSE            TRUE
## 11  4.350037e-03 6.879441e-05             TRUE            TRUE
##    passed_ss_(Intercept) passed_ss_TypePlaque Enrichment       color
## 1                  FALSE                 TRUE     Plaque aquamarine3
## 2                   TRUE                 TRUE     Plaque aquamarine3
## 3                  FALSE                 TRUE     Plaque aquamarine3
## 4                   TRUE                 TRUE     Plaque aquamarine3
## 5                  FALSE                FALSE    Abscess       black
## 6                  FALSE                FALSE    Abscess       black
## 7                  FALSE                FALSE    Abscess       black
## 8                  FALSE                 TRUE    Abscess aquamarine3
## 9                   TRUE                 TRUE    Abscess aquamarine3
## 10                 FALSE                FALSE    Abscess       black
## 11                 FALSE                 TRUE    Abscess aquamarine3

6.1.3 Species

runANCOM(ps_fs, 
         tax_level = "Species", 
         group = "Type", 
         name_of_saved_results = "ancombc_results_sp", 
         plot_type = "dot",
         #Log2FC_cutoff = 0.5,
         plot_heights = c(1, 8),
         plot_colors = colors_all )
## [1] "Column found: diff_TypePlaque"
## [1] "Column found: lfc_TypePlaque"
## [1] "Column found: passed_ss_TypePlaque"
## [1] "Column found: se_TypePlaque"
## [1] "Column found: p_TypePlaque"
##                                                    taxon lfc_(Intercept)
## 1                         g__Peptidiphaga_s__sp._HMT_183      -1.8448397
## 2                                 g__Actinomyces_s__oris      -1.3536212
## 3                       g__Alloprevotella_s__sp._HMT_473      -1.3599760
## 4                              g__Leptotrichia_s__shahii      -0.9502987
## 5                          g__Fusobacterium_s__hwasookii      -1.1092449
## 6  g__Streptococcus_s__oralis_subsp._dentisani_clade_058      -0.8266808
## 7                          g__Streptococcus_s__sanguinis      -0.8753612
## 8                             g__Actinomyces_s__dentalis      -0.5173902
## 9                       g__Alloprevotella_s__sp._HMT_912      -0.6931416
## 10                           g__Porphyromonas_s__pasteri      -0.6386582
## 11                           g__Corynebacterium_s__durum      -0.7189964
## 12                                    g__Rothia_s__aeria      -0.7676985
## 13                           g__Leptotrichia_s__buccalis      -0.5209130
## 14                     g__Aggregatibacter_s__sp._HMT_458      -0.4940944
## 15                          g__Porphyromonas_s__catoniae      -0.4210803
## 16                              g__Johnsonella_s__ignava       0.9087141
## 17                        g__Oribacterium_s__sp._HMT_078       0.4048557
## 18                           g__Prevotella_s__nigrescens       1.0359064
## 19           g__Bacteroidales_[G-2]_s__bacterium_HMT_274       1.0074565
## 20                        g__Lacticaseibacillus_s__casei       0.9166909
##    lfc_TypePlaque se_(Intercept) se_TypePlaque W_(Intercept) W_TypePlaque
## 1        2.594708     0.15708173     0.3011581    -11.744457     8.615765
## 2        2.387516     0.10593807     0.2531289    -12.777476     9.432018
## 3        2.207220     0.16142296     0.3249669     -8.424923     6.792138
## 4        2.002699     0.18148383     0.3531969     -5.236272     5.670204
## 5        1.967548     0.19887050     0.3432198     -5.577725     5.732618
## 6        1.731892     0.22648233     0.3964184     -3.650089     4.368849
## 7        1.528310     0.12626142     0.3225113     -6.932927     4.738780
## 8        1.500823     0.06211937     0.2278509     -8.328966     6.586862
## 9        1.469285     0.07865831     0.2517449     -8.812058     5.836404
## 10       1.407025     0.12003929     0.3277739     -5.320409     4.292669
## 11       1.363698     0.06678451     0.3184555    -10.765916     4.282223
## 12       1.359753     0.08855955     0.2851544     -8.668727     4.768478
## 13       1.202753     0.11327599     0.2785033     -4.598618     4.318633
## 14       1.140487     0.09933327     0.2517136     -4.974108     4.530892
## 15       1.097138     0.10398282     0.2485192     -4.049518     4.414702
## 16      -1.448142     0.09608370     0.2419342      9.457526    -5.985685
## 17      -1.461320     0.18473112     0.3013674      2.191594    -4.848966
## 18      -1.622690     0.17761621     0.3748921      5.832274    -4.328419
## 19      -1.690295     0.19435654     0.3312254      5.183548    -5.103158
## 20      -2.412659     0.18565853     0.2972651      4.937510    -8.116185
##    p_(Intercept) p_TypePlaque q_(Intercept) q_TypePlaque diff_(Intercept)
## 1   2.704195e-08 9.839578e-07  3.596579e-06 1.338183e-04             TRUE
## 2   8.242909e-10 6.157942e-08  1.121036e-07 8.436380e-06             TRUE
## 3   2.815634e-07 4.326027e-06  3.604012e-05 5.840136e-04             TRUE
## 4   3.438152e-05 1.254908e-05  4.057019e-03 1.681577e-03             TRUE
## 5   3.333966e-05 2.440032e-05  4.000759e-03 3.220842e-03             TRUE
## 6   6.922006e-04 7.499465e-05  6.506685e-02 9.674309e-03            FALSE
## 7   1.063877e-07 4.866869e-05  1.393679e-05 6.375598e-03             TRUE
## 8   7.040781e-05 3.080492e-04  8.026490e-03 3.665785e-02             TRUE
## 9   1.014356e-05 2.479272e-04  1.247658e-03 3.049504e-02             TRUE
## 10  1.289016e-05 2.032453e-04  1.572599e-03 2.520241e-02             TRUE
## 11  3.100489e-10 3.028224e-04  4.247671e-08 3.633868e-02             TRUE
## 12  1.518206e-08 9.242630e-05  2.034397e-06 1.173814e-02             TRUE
## 13  2.229253e-04 4.136548e-04  2.340716e-02 4.881126e-02             TRUE
## 14  9.819572e-05 2.587631e-04  1.099792e-02 3.156909e-02             TRUE
## 15  6.842525e-04 2.976024e-04  6.500398e-02 3.600989e-02            FALSE
## 16  1.287071e-06 9.111654e-05  1.634580e-04 1.166292e-02             TRUE
## 17  4.355012e-02 1.776965e-04  1.000000e+00 2.221206e-02            FALSE
## 18  2.876575e-06 1.732254e-04  3.595719e-04 2.182640e-02             TRUE
## 19  6.251088e-05 7.430375e-05  7.188752e-03 9.659487e-03             TRUE
## 20  8.049882e-04 1.972273e-05  7.405892e-02 2.623124e-03            FALSE
##    diff_TypePlaque passed_ss_(Intercept) passed_ss_TypePlaque Enrichment
## 1             TRUE                 FALSE                 TRUE     Plaque
## 2             TRUE                 FALSE                 TRUE     Plaque
## 3             TRUE                  TRUE                 TRUE     Plaque
## 4             TRUE                 FALSE                FALSE     Plaque
## 5             TRUE                 FALSE                FALSE     Plaque
## 6             TRUE                 FALSE                 TRUE     Plaque
## 7             TRUE                  TRUE                 TRUE     Plaque
## 8             TRUE                 FALSE                FALSE     Plaque
## 9             TRUE                 FALSE                FALSE     Plaque
## 10            TRUE                  TRUE                 TRUE     Plaque
## 11            TRUE                  TRUE                 TRUE     Plaque
## 12            TRUE                  TRUE                 TRUE     Plaque
## 13            TRUE                 FALSE                 TRUE     Plaque
## 14            TRUE                 FALSE                 TRUE     Plaque
## 15            TRUE                 FALSE                FALSE     Plaque
## 16            TRUE                 FALSE                FALSE    Abscess
## 17            TRUE                  TRUE                 TRUE    Abscess
## 18            TRUE                 FALSE                FALSE    Abscess
## 19            TRUE                 FALSE                FALSE    Abscess
## 20            TRUE                 FALSE                FALSE    Abscess
##          color
## 1  aquamarine3
## 2  aquamarine3
## 3  aquamarine3
## 4        black
## 5        black
## 6  aquamarine3
## 7  aquamarine3
## 8        black
## 9        black
## 10 aquamarine3
## 11 aquamarine3
## 12 aquamarine3
## 13 aquamarine3
## 14 aquamarine3
## 15       black
## 16       black
## 17 aquamarine3
## 18       black
## 19       black
## 20       black

6.2 Maaslin2

6.2.1 Iterate Parameters

resolutions <- c("Species", "Genus", "Phylum")
iterate_maaslin2(ps_obj = ps_fs, 
                 iterative_methods = iterative_methods,
                 resolutions = resolutions,
                 group = "Type",
                 qval_threshold = .25, 
                 colors_all = maaslin2_colors, 
                 percentage = TRUE)

6.2.2 Run Maaslin2

Genus

run_Maaslin2(ps_obj = ps_fs, 
                                 taxa_level = "Genus", 
                                 group = "Type",
                                 analysis_method = "LM", 
                                 normalization = "CSS",
                                 transform = "LOG", 
                                 plot_colors = colors_all,
                                 plot_type = "dot", 
                                 qval_threshold= 0.25, 
                                 plot_heights = c(1, 9))

Species

run_Maaslin2(ps_obj = ps_fs, 
                                 taxa_level = "Species", 
                                 group = "Type",
                                 analysis_method = "LM", 
                                 normalization = "CSS",
                                 transform = "LOG", 
                                 plot_colors = colors_all,
                                 plot_type = "dot", 
                                 qval_threshold= 0.25, 
                                 plot_heights = c(1, 9))

6.3 AlDEX2

6.3.1 Iterate Parameters

https://www.nature.com/articles/s41467-022-28034-z

“…performed CLR transformation of each realization, and then performed Wilcoxon tests on the transformed realizations. The function then returned the expected Benjamini-Hochberg (BH) FDR-corrected p-value for each feature based on the results the different across Monte Carlo samples.”

ALDEx2 uses by default the centred log-ratio (clr) transformation which is scale invariant

resolutions <- c("Species", "Genus", "Phylum")

 
iterate_aldex2(ps_obj = ps_fs, 
                 iterative_methods = iterative_methods,
                 resolutions = resolutions,
                 group = "Type",
                 percentage = FALSE)

aldex2_iterative_summary_table %>%
  filter(resolution == "Genus") %>%
  filter(paired == FALSE) %>%
  filter(paired == FALSE) %>%
  filter(denom == "all") %>%
  filter(transform == "log10") %>%
  dplyr::arrange(desc(significant_features)) %>%
  filter(significant_features > 0)

6.3.2 Run Aldex2

Genus

run_aldex2(ps_fs, 
                  "Type", 
                  "Genus",
                  method = "wilcox.test",
                  transform = "log10",
                  normalization = "CSS", 
                 plot_colors = colors_all)

Species

run_aldex2(ps_fs, 
                  "Type", 
                  "Species",
                  method = "wilcox.test",
                  transform = "log10",
                  normalization = "CSS", 
            plot_colors = colors_all)

6.4 Combine results

Genus

DA_results_df_genus <- combine_DA(maaslin2_results_Genus, ancombc2_results_df_Genus, aldex2_res_Genus, "Type", "Genus", c(1,3), plot_colors=colors_all)
## # A tibble: 73 × 13
##    taxon    Maaslin2_coef Maaslin2_pval Maaslin2_value ANCOMBC2_DFF ANCOMBC2_LFC
##    <chr>            <dbl>         <dbl> <chr>          <lgl>               <dbl>
##  1 Haemoph…          3.02     0.0000411 Plaque         NA                  NA   
##  2 Dialist…         -3.72     0.0000169 Plaque         TRUE                -1.84
##  3 Coryneb…          2.57     0.0000356 Plaque         TRUE                 1.32
##  4 Lautrop…          2.90     0.0000717 Plaque         NA                  NA   
##  5 Leptotr…          2.71     0.0000792 Plaque         TRUE                 1.76
##  6 Rothia            3.55     0.000106  Plaque         NA                  NA   
##  7 Trepone…         -4.10     0.000179  Plaque         TRUE                -2.35
##  8 Kingella          2.35     0.000315  Plaque         NA                  NA   
##  9 Actinom…          2.39     0.000380  Plaque         TRUE                 1.75
## 10 Lancefi…         -2.82     0.000438  Plaque         NA                  NA   
## # ℹ 63 more rows
## # ℹ 7 more variables: ANCOMBC2_p_val <dbl>, ANCOMBC2_passed_SS <lgl>,
## #   Aldex2_Enrichment <chr>, Aldex2_EF <dbl>, Aldex2_padj <dbl>,
## #   confidence <chr>, all_positive_or_negative <lgl>

Species

DA_results_df <- combine_DA(maaslin2_results_Species, ancombc2_results_df_Species, aldex2_res_Species, "Type", "Species", c(1,4), plot_colors=colors_all)
## # A tibble: 211 × 13
##    taxon    Maaslin2_coef Maaslin2_pval Maaslin2_value ANCOMBC2_DFF ANCOMBC2_LFC
##    <chr>            <dbl>         <dbl> <chr>          <lgl>               <dbl>
##  1 Fusobac…          3.62    0.00000216 Plaque         NA                  NA   
##  2 Strepto…          2.65    0.0000236  Plaque         TRUE                 1.53
##  3 Dialist…         -3.52    0.0000878  Plaque         NA                  NA   
##  4 Oribact…         -3.06    0.000125   Plaque         TRUE                -1.46
##  5 Lautrop…          2.75    0.000270   Plaque         NA                  NA   
##  6 Haemoph…          2.68    0.000286   Plaque         NA                  NA   
##  7 Prevote…         -3.17    0.000294   Plaque         NA                  NA   
##  8 Allopre…         -3.32    0.000333   Plaque         NA                  NA   
##  9 Rothia …          2.47    0.000400   Plaque         TRUE                 1.36
## 10 Coryneb…          2.09    0.000573   Plaque         TRUE                 1.36
## # ℹ 201 more rows
## # ℹ 7 more variables: ANCOMBC2_p_val <dbl>, ANCOMBC2_passed_SS <lgl>,
## #   Aldex2_Enrichment <chr>, Aldex2_EF <dbl>, Aldex2_padj <dbl>,
## #   confidence <chr>, all_positive_or_negative <lgl>
plot_all_taxa(ps_obj = ps_fs,
              transformation = "TSS",
              group = "Type",
              taxa_level = "Species",
              plot_type = "dot",
              plot_colors = colors_all,
              taxa_filter = c("g__Fusobacterium_s__nucleatum", "g__Fusobacterium_s__periodonticum"))